Female Nobel Laureates
R
Visuals
2022
Get Data
An API is available to get the raw data needed from the relevant website.
#get laureate data with nobel prize API
= GET('http://api.nobelprize.org/2.1/laureates?limit=1000')
res1 = fromJSON(rawToChar(res1$content))
json_laureate
<- json_laureate$laureates
laureate
%>% glimpse() laureate
Rows: 981
Columns: 23
$ id <chr> "745", "102", "779", "259", "1004", "114", "982", "9…
$ knownName <df[,3]> <data.frame[26 x 3]>
$ givenName <df[,3]> <data.frame[26 x 3]>
$ familyName <df[,3]> <data.frame[26 x 3]>
$ fullName <df[,3]> <data.frame[26 x 3]>
$ fileName <chr> "spence", "bohr", "ciechanover", "klug", "gurnah"…
$ gender <chr> "male", "male", "male", "male", "male", "male", "…
$ birth <df[,2]> <data.frame[26 x 2]>
$ wikipedia <df[,2]> <data.frame[26 x 2]>
$ wikidata <df[,2]> <data.frame[26 x 2]>
$ sameAs <list> <"https://www.wikidata.org/wiki/Q157245", "https:…
$ links <list> [<data.frame[2 x 6]>], [<data.frame[2 x 6]>], [<d…
$ nobelPrizes <list> [<data.frame[1 x 12]>], [<data.frame[1 x 12]>], [<da…
$ death <df[,2]> <data.frame[26 x 2]>
$ orgName <df[,3]> <data.frame[26 x 3]>
$ acronym <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ founded <df[,2]> <data.frame[26 x 2]>
$ nativeName <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penName <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penNameOf <df[,1]> <data.frame[26 x 1]>
$ foundedCountry <df[,3]> <data.frame[26 x 3]>
$ foundedCountryNow <df[,3]> <data.frame[26 x 3]>
$ foundedContinent <df[,1]> <data.frame[26 x 1]>
Data Munging
Award Winner - Names
#create data of noble laureates
<- laureate %>%
df_laureate unnest(c(fullName, givenName, familyName, birth), names_repair = tidyr_legacy) %>%
select(id, en, en1, en2, gender, date, place) %>%
rename(id="id",
last_name = "en1",
first_name = "en",
full_name = "en2",
birth_date = "date") %>%
unnest(place) %>%
unnest(cityNow, countryNow, names_repair = tidyr_legacy) %>%
select(id, full_name, first_name, last_name, birth_date, gender, en, en1) %>%
rename(birth_city = "en",
birth_country = "en1")
%>% glimpse() df_laureate
Rows: 981
Columns: 8
$ id <chr> "745", "102", "779", "259", "1004", "114", "982", "981",…
$ full_name <chr> "A. Michael Spence", "Aage Niels Bohr", "Aaron Ciechanov…
$ first_name <chr> "A. Michael", "Aage N.", "Aaron", "Aaron", "Abdulrazak",…
$ last_name <chr> "Spence", "Bohr", "Ciechanover", "Klug", "Gurnah", "Sala…
$ birth_date <chr> "1943-00-00", "1922-06-19", "1947-10-01", "1926-08-11", …
$ gender <chr> "male", "male", "male", "male", "male", "male", "male", …
$ birth_city <chr> "Montclair, NJ", "Copenhagen", "Haifa", "Zelvas", NA, "J…
$ birth_country <chr> "USA", "Denmark", "Israel", "Lithuania", NA, "Pakistan",…
Award Categories
#create data set of awards (noble prizes)
<- laureate %>% select(id, nobelPrizes) %>%
df_prize unnest(nobelPrizes, repair = "universal") %>%
select(id, awardYear, category) %>%
unnest(category) %>%
select(id, awardYear, en) %>%
rename(laureate_id = "id", award_year = "awardYear", category = "en")
%>% glimpse() df_prize
Rows: 989
Columns: 3
$ laureate_id <chr> "745", "102", "779", "259", "1004", "114", "982", "981", "…
$ award_year <chr> "2001", "1975", "2004", "1982", "2021", "1979", "2019", "2…
$ category <chr> "Economic Sciences", "Physics", "Chemistry", "Chemistry", …
Join the Data
#combine the two datasets
<- left_join(df_prize, df_laureate, by = c("laureate_id"="id"))
df_prize_laureate #convert year to integer
$award_year <- as.integer(df_prize_laureate$award_year)
df_prize_laureate$count <- 1
df_prize_laureate
%>% glimpse() df_prize_laureate
Rows: 989
Columns: 11
$ laureate_id <chr> "745", "102", "779", "259", "1004", "114", "982", "981",…
$ award_year <int> 2001, 1975, 2004, 1982, 2021, 1979, 2019, 2019, 2009, 20…
$ category <chr> "Economic Sciences", "Physics", "Chemistry", "Chemistry"…
$ full_name <chr> "A. Michael Spence", "Aage Niels Bohr", "Aaron Ciechanov…
$ first_name <chr> "A. Michael", "Aage N.", "Aaron", "Aaron", "Abdulrazak",…
$ last_name <chr> "Spence", "Bohr", "Ciechanover", "Klug", "Gurnah", "Sala…
$ birth_date <chr> "1943-00-00", "1922-06-19", "1947-10-01", "1926-08-11", …
$ gender <chr> "male", "male", "male", "male", "male", "male", "male", …
$ birth_city <chr> "Montclair, NJ", "Copenhagen", "Haifa", "Zelvas", NA, "J…
$ birth_country <chr> "USA", "Denmark", "Israel", "Lithuania", NA, "Pakistan",…
$ count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
Reshape the Data
#reshape data
<- df_prize_laureate %>%
df_grouping complete(category = unique(df_prize_laureate$category), award_year = 1901:2021) %>%
group_by(category,award_year) %>%
summarize(total_count = sum(count),
male_count = sum(count[gender == "male"]),
female_count = sum(count[gender == "female"])) %>%
mutate(
grouping=case_when(female_count == total_count ~ "Female",
== total_count ~ "Male",
male_count > 0 ~"Mixed Team"),
female_count award_decade = round(award_year / 10) * 10,
year_split = case_when(award_year >= 1981 ~"1981-2021",
>= 1941 ~"1941-1980",
award_year >= 1901~"1901-1940")
award_year
)
#categories
= c("Physiology or Medicine","Physics","Chemistry","Literature","Peace","Economic Sciences")
category_list #factor categories, i.e. Economic Sciences not introduced until later
$category <- factor(df_grouping$category, levels = rev(category_list))
df_grouping
#palette
<-c('#D90368','#2274A5','#F1C40F')
pal
%>% glimpse() df_grouping
Rows: 732
Columns: 8
Groups: category [6]
$ category <fct> Chemistry, Chemistry, Chemistry, Chemistry, Chemistry, Ch…
$ award_year <int> 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 191…
$ total_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, NA, NA, 1, N…
$ male_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 1, 1, NA, NA, 1, N…
$ female_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, NA, NA, 0, N…
$ grouping <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "…
$ award_decade <dbl> 1900, 1900, 1900, 1900, 1900, 1910, 1910, 1910, 1910, 191…
$ year_split <chr> "1901-1940", "1901-1940", "1901-1940", "1901-1940", "1901…
Visualize
#plot
<- ggplot(df_grouping, aes(x = award_year, y = category, fill = grouping)) +
g1 geom_tile(color = "white", width = .9, height = .9) +
scale_fill_manual(values = pal, na.value = "grey85",
guide = guide_legend(title.position = "top", title.hjust=0.5)) +
facet_wrap(~ year_split, ncol = 1, scales = "free_x") +
labs(title = "Noble Prize Laureates",
caption ="Data from Noble Prize API",
subtitle = 'Note: Some categories and years have more than one recipient, \n "Mixed" denotes a team with male and female laureates',
x = "Year",
y = "Category",
fill = "Recipient Gender") +
theme_void() +
theme(text = element_text(),
legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 20, vjust = 5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, vjust = 6, size = 12),
#axis.title.x = element_text(family = "nunito"),
#axis.text = element_text(family = "nunito"),
axis.text.y = element_text(hjust = 1, size = 10),
strip.text.x = element_text(size = 12),
plot.caption = element_text(size = 10, hjust = 0.95),
plot.margin = unit(c(1.1, 0.8, 0.8, 0.8), "cm"),
legend.spacing.x = unit(0.8, 'cm'),
legend.box.margin=margin(0, 0, 0.25, 0))
g1
Awarded Women List
%>% filter(gender == "female") %>% select(full_name, birth_country, award_year) %>%
df_prize_laureate arrange(award_year) %>%
rename("Fulll Name" = full_name, "Country" = birth_country, "Year Awarded" = award_year) %>%
gt() %>% tab_header(title = md("**Nobel Laureate Women**"))
Nobel Laureate Women | ||
Fulll Name | Country | Year Awarded |
---|---|---|
Marie Curie, née Sklodowska | Poland | 1903 |
Baroness Bertha Sophie Felicita von Suttner, née Countess Kinsky von Chinic und Tettau | Czech Republic | 1905 |
Selma Ottilia Lovisa Lagerlöf | Sweden | 1909 |
Marie Curie, née Sklodowska | Poland | 1911 |
Grazia Deledda | Italy | 1926 |
Sigrid Undset | Denmark | 1928 |
Jane Addams | USA | 1931 |
Irène Joliot-Curie | France | 1935 |
Pearl Buck | USA | 1938 |
Gabriela Mistral | Chile | 1945 |
Emily Greene Balch | USA | 1946 |
Gerty Theresa Cori, née Radnitz | Czech Republic | 1947 |
Maria Goeppert Mayer | Poland | 1963 |
Dorothy Crowfoot Hodgkin | Egypt | 1964 |
Nelly Sachs | Germany | 1966 |
Elizabeth Williams | Northern Ireland | 1976 |
Mairead Corrigan | Northern Ireland | 1976 |
Rosalyn Yalow | USA | 1977 |
Mother Teresa | North Macedonia | 1979 |
Alva Myrdal | Sweden | 1982 |
Barbara McClintock | USA | 1983 |
Rita Levi-Montalcini | Italy | 1986 |
Gertrude B. Elion | USA | 1988 |
Aung San Suu Kyi | Myanmar | 1991 |
Nadine Gordimer | South Africa | 1991 |
Rigoberta Menchú Tum | Guatemala | 1992 |
Toni Morrison | USA | 1993 |
Christiane Nüsslein-Volhard | Germany | 1995 |
Wislawa Szymborska | Poland | 1996 |
Jody Williams | USA | 1997 |
Shirin Ebadi | Iran | 2003 |
Elfriede Jelinek | Austria | 2004 |
Linda B. Buck | USA | 2004 |
Wangari Muta Maathai | Kenya | 2004 |
Doris Lessing | Iran | 2007 |
Françoise Barré-Sinoussi | France | 2008 |
Ada E. Yonath | Israel | 2009 |
Carol W. Greider | USA | 2009 |
Elinor Ostrom | USA | 2009 |
Elizabeth H. Blackburn | Australia | 2009 |
Herta Müller | Romania | 2009 |
Ellen Johnson Sirleaf | Liberia | 2011 |
Leymah Gbowee | Liberia | 2011 |
Tawakkol Karman | Yemen | 2011 |
Alice Munro | Canada | 2013 |
Malala Yousafzai | Pakistan | 2014 |
May-Britt Moser | Norway | 2014 |
Svetlana Alexievich | Ukraine | 2015 |
Tu Youyou | China | 2015 |
Donna Strickland | Canada | 2018 |
Frances H. Arnold | USA | 2018 |
Nadia Murad Basee Taha | Iraq | 2018 |
Olga Tokarczuk | Poland | 2018 |
Esther Duflo | France | 2019 |
Andrea Ghez | USA | 2020 |
Emmanuelle Charpentier | France | 2020 |
Jennifer A. Doudna | USA | 2020 |
Louise Glück | USA | 2020 |
Maria Ressa | Philippines | 2021 |
Annie Ernaux | France | 2022 |
Carolyn R. Bertozzi | USA | 2022 |
Female Laureates Count by Country
%>% filter(gender == "female") %>% select(full_name, birth_country, award_year) %>%
df_prize_laureate rename("Country" = birth_country) %>%
group_by(Country) %>% summarise("Country Count" = n()) %>% arrange(desc(`Country Count`)) %>%
gt() %>% tab_header(title = md("**Nobel Laureate Women by Country**"))
Nobel Laureate Women by Country | |
Country | Country Count |
---|---|
USA | 16 |
France | 5 |
Poland | 5 |
Canada | 2 |
Czech Republic | 2 |
Germany | 2 |
Iran | 2 |
Italy | 2 |
Liberia | 2 |
Northern Ireland | 2 |
Sweden | 2 |
Australia | 1 |
Austria | 1 |
Chile | 1 |
China | 1 |
Denmark | 1 |
Egypt | 1 |
Guatemala | 1 |
Iraq | 1 |
Israel | 1 |
Kenya | 1 |
Myanmar | 1 |
North Macedonia | 1 |
Norway | 1 |
Pakistan | 1 |
Philippines | 1 |
Romania | 1 |
South Africa | 1 |
Ukraine | 1 |
Yemen | 1 |